# Immigration and the top 1%

# Code to prepare data 


# Functions to calculate top income shares

## These are functions used to calculate summary statistics (mean, proportion) for individuals in top share


# # # #

# number of individuals in top share
# input: year and top share
# output: a scalar
how_many_top <- function(y, topshare) {
  # number of groups
  g <- 1/topshare
  
  # number of individuals in top share
  count_top <- pop_control_tot %>%
    filter(year == y) %>%
    select(var_control) %>%
    as_vector()/g
  
  # round
  count_top <- round(count_top, digits = 0)
  
  # return
  return(count_top)
}


# upload data pre tax
# input: tax year
upload_pretax <- function(y, file_name) {
  
  # upload
  data <- read_csv(file = file_name, 
                   col_types = cols(.default = col_double(),
                                    uk_resid = col_character(),
                                    nino_anon = col_character()))
  
  # sort
  data <- data %>% arrange(desc(ti))
  
  # return
  return(data)
  
}


# upload data post tax
# input: tax year
upload_posttax <- function(y, file_name) {

  # upload
  data <- read_csv(file = file_name, 
                   col_types = cols(.default = col_double(),
                                    uk_resid = col_character(),
                                    nino_anon = col_character()))

  # sort
  data <- data %>% arrange(desc(ti_posttax))
  
  # return
  return(data)
  
}


# subset sample. We keep:
# 1. resident
# 2. non-missing year of birth
# 3. non-missing migrant dummy
# 4. adults (18 or older)
# input: a data frame, age limit
# output: a data frame
subset <- function(data, age) {
  
  # UK resident only
  data <- data %>% filter(uk_resid == 'Y')
  
  # recode missing year of birth to 1000
  # so as not to drop them, will be recoded back to NA later
  data <- data %>% mutate(yob_mode = if_else(is.na(yob_mode), 1000, yob_mode))
  
  # keep individuals older than 18
  data <- data %>% filter(tax_year - yob_mode >= age)
  
  # recode yob=1000 to NA
  data <- data %>% mutate(yob_mode = if_else(yob_mode == 1000, NA_real_, yob_mode))
  
  # return
  return(data)
  
}


# keep individuals in top share (regardless of migrant status)
# input: data frame and number of individuals in top share
# ouput: data frame with all individuals in top share
top_all <- function(data, n_top) {
  
  # find individual at cutoff based on weighted ranking
  ind <- min(which(data$w_rank >= n_top))
  
  # individuals in top share
  all_top <- data[1:ind, ]
  
  # return
  return(all_top)
}


# find replacement of migrants in top share among the following brits 
# input: data frame, size of top share, number of migrants in the top, name of migrant dummy
# output: data frame with the brits that replace migrants
brits_replacement <- function(df, n_top, n_migrant, dummy_migrant) {
  
  # drop individuals in the top
  df <- df[(n_top + 1):dim(df)[1], ]
  
  # drop migrants
  df <- df %>%
    filter(!!dummy_migrant == 0)
  
  # find brits replacement
  df <- df[1:n_migrant, ]
  
  # return
  return(df)
  
  # remove
  rm(df)
  
}


# summary table PRE-TAX
# input: dataframe
# ouput: a table
sumstat_pretax <- function(df) {
  
  # Calculate average income by income category
  sum_stats <- df %>%
    select(tax_year, tei, tii, ti, emp_inc, self_emp_inc, tot_pensions) %>%
    summarise_all('mean')
  
  # Count number of migrants
  sum_stats2 <- df %>%
    select(starts_with('migrant')) %>%
    summarise_all('sum', na.rm = T)
  
  # count number of observations
  n <- dim(df)[1]
  
  # Combine
  sum_stats <- cbind(sum_stats, sum_stats2)
  
  # attach number of observations
  sum_stats <- sum_stats %>%
    mutate(n = n)
  
  # Return
  return(sum_stats)
}


# summary table POST-TAX
# input: dataframe
# output: table
sumstat_posttax <- function(df) {
  # Calculate average post tax income
  sum_stats <- df %>%
    select(tax_year, ti_posttax) %>%
    summarise_all('mean')
  
  # Count number of migrants
  sum_stats2 <- df %>%
    select(starts_with('migrant')) %>%
    summarise_all('sum', na.rm = T)
  
  # number of observations
  n <- dim(df)[1]
  
  # Combine
  sum_stats <- cbind(sum_stats, sum_stats2)
  
  # add number of observations
  sum_stats <- sum_stats %>%
    mutate(n = n)
  
  # Return
  return(sum_stats)
}



# # # # 



